* This file uses the optimal polynomial, and produces the results for column 4 of Table A2. 
* Note: NEED TO RUN AppA_make_polystar.do first to calculate correct polynomial

use "$saveddata/data_complete_withcosts.dta", clear

set more off	
	
		gen helper = 1
		merge m:1 helper using "$saveddata/lookup_polystar_agg.dta"
		keep if _merge==3
		drop _merge
		drop helper
		
		/* 'optimal' case doesn't converge for death30 - visual inspection shows the graph doesn't fit very well, and porder=10 is the best fit visually */
		replace polystar_death30 = 10
		
	preserve	
	cap drop diff_death

		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) tcount icount admit discharge* ///
		inpat_los = spell_los inpat_proc = number_proc ///
		death* othexit middur tretdur interdur initdur ///
		d_* costem30_all costae30_all cost30_aeem polystar*, by(bin)

		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		
		
		compress

		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/15 {
		gen bin`x' = bin^`x'
		}

	* parameters
	su polystar_wait
	local porder = r(mean)
	local start = 180
	local end = 240

	* waiting times - find end point
	local sumdiff = 999
	while abs(`sumdiff')>0.01 {
		
		cap drop pfreq_cf diff
		
		* display then update end point
		di "End point: `end'"
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
			
		
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* weights for pre-threshold test
	egen sumfreq_pre_obs = sum(freq) if bin>`start' & bin<=240
	gen w_pre_obs = freq/sumfreq_pre_obs

	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	gen w_pre_cf = freq_cf/sumfreq_pre_cf

	gen freq_diff = freq_cf - freq 
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	gen w_post_diff = freq_diff/sumfreq_post_diff

	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre_obs
	
	* weights for DP test
	egen sumfreq_dp_obs = sum(freq) if bin>`start'& bin<=`end'
	gen w_dp_obs = freq/sumfreq_dp_obs

	egen sumfreq_dp_cf = sum(freq_cf) if bin>`start'& bin<=`end'
	gen w_dp_cf = freq_cf/sumfreq_dp_cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{

		* counterfactual outcomes
		qui su polystar_`var'
		local porder = r(mean)
		
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		predict `var'_cf, xb
		
		* pre-threshold test

			* non-movers: pre-target counterfactual average outcome 
			qui egen pre_`var'_pre_cf = sum(`var'_cf*w_pre_cf)
				
			* movers: post-target counterfactual average outcome (weighted by mover proportions)
			qui egen pre_`var'_post_cf = sum(`var'_cf*w_post_diff)

			* composition-adjusted counterfactual
			qui gen pre_`var'_cac = nonmover_pre*pre_`var'_pre_cf + (1-nonmover_pre)*pre_`var'_post_cf
			
			* observed outcome 
			qui egen pre_`var'_obs = sum(`var'*w_pre_obs)
			
		* Diamond-Pearson test
	
			* counterfactual
			qui egen dp_`var'_cf = sum(`var'_cf*w_dp_cf)
				
			* observed outcome 
			qui egen dp_`var'_obs = sum(`var'*w_dp_obs)

		}

	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre pre_* dp_*, by(helper)

	foreach i in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{
		gen diff_`i' = pre_`i'_obs - pre_`i'_cac
		gen diff_pc_`i' = diff_`i' / pre_`i'_cac
		
		gen dp_diff_`i' = dp_`i'_obs - dp_`i'_cf
		gen dp_diff_pc_`i' = dp_diff_`i' / dp_`i'_cf
		}
		
	foreach x in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{
		su diff_`x'
		matrix observe_`x' = r(mean)
		su diff_pc_`x'
		matrix observe_pc_`x' = r(mean)
		
		su dp_diff_`x'
		matrix observe_dp_`x' = r(mean)
		su dp_diff_pc_`x'
		matrix observe_pc_dp_`x' = r(mean)
		}
		

restore

set more off

capture program drop myboot

program define myboot, rclass

* data for aggregate analysis
	preserve

	* draw bootstrap sample
	gen trust_code = substr(site_code,1,3)
	bsample, cluster(trust_code)
	
	cap drop diff_death
	cap drop *imd*
	
set more off
		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) tcount icount admit discharge* ///
		inpat_los = spell_los inpat_proc = number_proc ///
		death* othexit middur tretdur interdur initdur ///
		d_* costem30_all costae30_all cost30_aeem polystar*, by(bin)


		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress

		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/15 {
		gen bin`x' = bin^`x'
		}

	* parameters
	su polystar_wait
	local porder = r(mean)
	local start = 180
	local end = 240

	* waiting times - find end point
	local sumdiff = 999
	while abs(`sumdiff')>0.01 {
		
		cap drop pfreq_cf diff
		
		* display then update end point
		di "End point: `end'"
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
			
		
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* weights for pre-threshold test
	egen sumfreq_pre_obs = sum(freq) if bin>`start' & bin<=240
	gen w_pre_obs = freq/sumfreq_pre_obs

	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	gen w_pre_cf = freq_cf/sumfreq_pre_cf

	gen freq_diff = freq_cf - freq 
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	gen w_post_diff = freq_diff/sumfreq_post_diff

	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre_obs
	
	* weights for DP test
	egen sumfreq_dp_obs = sum(freq) if bin>`start'& bin<=`end'
	gen w_dp_obs = freq/sumfreq_dp_obs

	egen sumfreq_dp_cf = sum(freq_cf) if bin>`start'& bin<=`end'
	gen w_dp_cf = freq_cf/sumfreq_dp_cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{

		* counterfactual outcomes
		qui su polystar_`var'
		local porder = r(mean)
		
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		predict `var'_cf, xb
		
		* pre-threshold test

			* non-movers: pre-target counterfactual average outcome 
			qui egen pre_`var'_pre_cf = sum(`var'_cf*w_pre_cf)
				
			* movers: post-target counterfactual average outcome (weighted by mover proportions)
			qui egen pre_`var'_post_cf = sum(`var'_cf*w_post_diff)

			* composition-adjusted counterfactual
			qui gen pre_`var'_cac = nonmover_pre*pre_`var'_pre_cf + (1-nonmover_pre)*pre_`var'_post_cf
			
			* observed outcome 
			qui egen pre_`var'_obs = sum(`var'*w_pre_obs)
			
		* DP test
	
			* counterfactual
			qui egen dp_`var'_cf = sum(`var'_cf*w_dp_cf)
				
			* observed outcome 
			qui egen dp_`var'_obs = sum(`var'*w_dp_obs)

		}


	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre pre_* dp_*, by(helper)

	foreach i in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{
		gen diff_`i' = pre_`i'_obs - pre_`i'_cac
		gen diff_pc_`i' = diff_`i' / pre_`i'_cac
		
		gen dp_diff_`i' = dp_`i'_obs - dp_`i'_cf
		gen dp_diff_pc_`i' = dp_diff_`i' / dp_`i'_cf
		}
		
	foreach x in admit discharge othexit icount tcount inpat_los inpat_proc costem30_all costae30_all cost30_aeem death30 death90 death365{
	su diff_`x'
	return scalar d_`x' = r(mean)
	su diff_pc_`x'
	return scalar d_pc_`x' = r(mean)
	
	su dp_diff_`x'
	return scalar d_dp_`x' = r(mean)
	su dp_diff_pc_`x'
	return scalar d_pc_dp_`x' = r(mean)
		
		
		}
	
	restore

* END PROGRAMME DEFINE HERE	
end	


#delimit ;
* Simulate 199 times;

simulate diff_admit = r(d_admit)  		
diff_discharge = r(d_discharge) 
diff_othexit = r(d_othexit)  		
diff_icount = r(d_icount) 	
diff_tcount = r(d_tcount) 	
diff_inpat_los = r(d_inpat_los) 
diff_inpat_proc = r(d_inpat_proc) 
diff_costae30_all = r(d_costae30_all) 	
diff_costem30_all = r(d_costem30_all) 
diff_cost30_aeem = r(d_cost30_aeem) 
diff_death30 = r(d_death30) 			
, reps(199) seed(32786105): myboot;


#delimit ;
* Boostrap;
bstat, stat(observe_admit, 
observe_discharge, 
observe_othexit, 
observe_icount, 
observe_tcount, 
observe_inpat_los, 
observe_inpat_proc, 
observe_costae30_all, 
observe_costem30_all,  
observe_cost30_aeem, 
observe_death30
);


#delimit cr
* Output results
putexcel set "$results/TableA2_col4.xlsx", replace
putexcel A1 = "Variable"
putexcel B1 = "Coefficient"
putexcel C1 = "Std. error"

matrix b = e(b)'
putexcel A2 = matrix(b), rownames

matrix se = e(se)'
putexcel C2 = matrix(se)
